## This program takes the composite p value analysis and calculates Benjamini-Hochberg FDR q-values for each p-value
## Requires one argument: composite p value analysis
## > python gene_fdr.py mwu_sample.txt > fdr_sample_output.txt
## Output: composite pvals with FDR qvals

import operator, random, sys

## Create Dictionary with genes and P-vals {gene:[p-val],...}

gene_dict = {}

for line in open(sys.argv[1]):
    split = line.split('\t')
    #print split
    if len(split) >= 7:
        gene_dict[split[0]] = [split[5]]

#print gene_dict
## Function that calculates Benjamini-Hochberg FDR q-values. Argument required is ordered list of p-values.

def bh_qvalues(pv):
    """
    Return Benjamini-Hochberg FDR q-values corresponding to p-values C{pv}.

    This function implements an algorithm equivalent to L{bh_rejected} but
    yields a list of 'adjusted p-values', allowing for rejection decisions
    based on any given threshold.

    @type pv: list
    @param pv: p-values from a multiple statistical test

    @rtype: list
    @return: adjusted p-values to be compared directly with the desired FDR
      level
    """
    if not pv:
        return []
    m = len(pv)
    args, pv = zip(*sorted(enumerate(pv), None, operator.itemgetter(1)))
    #print args
    qvalues = m * [0]
    mincoeff = pv[-1]
    qvalues[args[-1]] = mincoeff
    for j in xrange(m-2, -1, -1):
        coeff = m*pv[j]/float(j+1)
        if coeff < mincoeff:
            mincoeff = coeff
        qvalues[args[j]] = mincoeff
    return qvalues

## Create ordered list of p_values with their corresponding genes

genes_list = []

for k,v in gene_dict.iteritems():
    genes_list.append([v[0],k])

genes_list.sort()

p_vals = []; genes = []

for i in genes_list:
    p_vals.append(float(i[0]))
    genes.append(i[1])
#print p_vals

## Assign q values for each p value

q_vals = bh_qvalues(p_vals)
#print q_vals

## Return q values to dictionary

for i in range(len(p_vals)):
    if genes[i] in gene_dict:
        gene_dict[genes[i]].append(q_vals[i])
#print gene_dict
## Print

for line in open(sys.argv[1]):
    split = line.split('\t')
    print "%s\t" % split[0],
    if split[0] in gene_dict: print gene_dict[split[0]][1],
    print
